Use Faraday in WebsiteAgent and make HTTP backend library selectable.

Typhoeus is still suggested as default, but you could choose 'em_http',
or 'net_http' if you have a problem with it.

Akinori MUSHA 11 years ago
parent
commit
85a7369e65
5 changed files with 66 additions and 30 deletions
  1. 7 0
      .env.example
  2. 2 0
      Gemfile
  3. 4 0
      Gemfile.lock
  4. 49 28
      app/models/agents/website_agent.rb
  5. 4 2
      spec/models/agents/website_agent_spec.rb

+ 7 - 0
.env.example

@@ -82,6 +82,13 @@ AWS_SANDBOX=false
82 82
 #   Various Settings   #
83 83
 ########################
84 84
 
85
+# Specify the HTTP backend library for Faraday, used in WebsiteAgent.
86
+# You can change this depending on the performance and stability you
87
+# need for your service.  Any choice other than "typhoeus",
88
+# "net_http", "em_http" should require you to bundle a corresponding
89
+# gem via Gemfile.
90
+FARADAY_HTTP_BACKEND=typhoeus
91
+
85 92
 # Allow JSONPath eval expresions. i.e., $..price[?(@ < 20)]
86 93
 # You should not allow this on a shared Huginn box because it is not secure.
87 94
 ALLOW_JSONPATH_EVAL=false

+ 2 - 0
Gemfile

@@ -33,6 +33,8 @@ gem 'geokit', '~> 1.6.7'
33 33
 gem 'geokit-rails3', '~> 0.1.5'
34 34
 
35 35
 gem 'kramdown', '~> 1.1.0'
36
+gem 'faraday', '~> 0.9.0'
37
+gem 'faraday_middleware'
36 38
 gem 'typhoeus', '~> 0.6.3'
37 39
 gem 'nokogiri', '~> 1.6.0'
38 40
 

+ 4 - 0
Gemfile.lock

@@ -106,6 +106,8 @@ GEM
106 106
     execjs (2.0.2)
107 107
     faraday (0.9.0)
108 108
       multipart-post (>= 1.2, < 3)
109
+    faraday_middleware (0.9.1)
110
+      faraday (>= 0.7.4, < 0.10)
109 111
     ffi (1.9.3)
110 112
     forecast_io (2.0.0)
111 113
       faraday
@@ -316,6 +318,8 @@ DEPENDENCIES
316 318
   devise (~> 3.0.0)
317 319
   dotenv-rails
318 320
   em-http-request (~> 1.1.2)
321
+  faraday (~> 0.9.0)
322
+  faraday_middleware
319 323
   forecast_io (~> 2.0.0)
320 324
   foreman (~> 0.63.0)
321 325
   geokit (~> 1.6.7)

+ 49 - 28
app/models/agents/website_agent.rb

@@ -1,5 +1,6 @@
1 1
 require 'nokogiri'
2
-require 'typhoeus'
2
+require 'faraday'
3
+require 'faraday_middleware'
3 4
 require 'date'
4 5
 
5 6
 module Agents
@@ -38,7 +39,7 @@ module Agents
38 39
 
39 40
       Note that for all of the formats, whatever you extract MUST have the same number of matches for each extractor.  E.g., if you're extracting rows, all extractors must match all rows.  For generating CSS selectors, something like [SelectorGadget](http://selectorgadget.com) may be helpful.
40 41
 
41
-      Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `username:password`.
42
+      Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `"username:password"`, or `["username", "password"]`.
42 43
 
43 44
       Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.  This is only used to set the "working" status.
44 45
 
@@ -103,34 +104,25 @@ module Agents
103 104
           errors.add(:base, "force_encoding must be a string")
104 105
         end
105 106
       end
107
+
108
+      begin
109
+        basic_auth_credentials()
110
+      rescue => e
111
+        errors.add(:base, e.message)
112
+      end
106 113
     end
107 114
 
108 115
     def check
109
-      log "Fetching #{options['url']}"
110 116
       check_url options['url']
111 117
     end
112 118
 
113 119
     def check_url(in_url)
114
-      hydra = Typhoeus::Hydra.new
115
-      request_opts = { :followlocation => true }
116
-      request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present?
117
-
118
-      requests = []
120
+      return unless in_url.present?
119 121
 
120
-      if in_url.kind_of?(Array)
121
-        in_url.each do |url|
122
-           requests.push(Typhoeus::Request.new(url, request_opts))
123
-        end
124
-      else
125
-        requests.push(Typhoeus::Request.new(in_url, request_opts))
126
-      end
127
-
128
-      requests.each do |request|
129
-        request.on_failure do |response|
130
-          error "Failed: #{response.inspect}"
131
-        end
132
-
133
-        request.on_success do |response|
122
+      Array(in_url).each do |url|
123
+        log "Fetching #{url}"
124
+        response = faraday.get(url)
125
+        if response.success?
134 126
           body = response.body
135 127
           if (encoding = options['force_encoding']).present?
136 128
             body = body.encode(Encoding::UTF_8, encoding)
@@ -183,14 +175,14 @@ module Agents
183 175
               error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}"
184 176
               return
185 177
             end
186
-        
178
+
187 179
             old_events = previous_payloads num_unique_lengths.first
188 180
             num_unique_lengths.first.times do |index|
189 181
               result = {}
190 182
               options['extract'].keys.each do |name|
191 183
                 result[name] = output[name][index]
192 184
                 if name.to_s == 'url'
193
-                  result[name] = URI.join(request.base_url, result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
185
+                  result[name] = (response.env[:url] + result[name]).to_s
194 186
                 end
195 187
               end
196 188
 
@@ -200,10 +192,9 @@ module Agents
200 192
               end
201 193
             end
202 194
           end
195
+        else
196
+          error "Failed: #{response.inspect}"
203 197
         end
204
-
205
-        hydra.queue request
206
-        hydra.run
207 198
       end
208 199
     end
209 200
 
@@ -288,6 +279,36 @@ module Agents
288 279
       end
289 280
     end
290 281
 
291
-  end
282
+    def faraday
283
+      @faraday ||= Faraday.new { |builder|
284
+        builder.use FaradayMiddleware::FollowRedirects
285
+        builder.request :url_encoded
286
+        if userinfo = basic_auth_credentials()
287
+          builder.request :basic_auth, *userinfo
288
+        end
292 289
 
290
+        case backend = faraday_backend
291
+        when :typhoeus
292
+          require 'typhoeus/adapters/faraday'
293
+        end
294
+        builder.adapter backend
295
+      }
296
+    end
297
+
298
+    def faraday_backend
299
+      ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
300
+    end
301
+
302
+    def basic_auth_credentials
303
+      case value = options['basic_auth']
304
+      when nil, ''
305
+        return nil
306
+      when Array
307
+        return value if value.size == 2
308
+      when /:/
309
+        return value.split(/:/, 2)
310
+      end
311
+      raise "bad value for basic_auth: #{value.inspect}"
312
+    end
313
+  end
293 314
 end

+ 4 - 2
spec/models/agents/website_agent_spec.rb

@@ -348,7 +348,9 @@ describe Agents::WebsiteAgent do
348 348
 
349 349
   describe "checking with http basic auth" do
350 350
     before do
351
-      stub_request(:any, /user:pass/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
351
+      stub_request(:any, /example/).
352
+        with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
353
+        to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
352 354
       @site = {
353 355
         'name' => "XKCD",
354 356
         'expected_update_period_in_days' => 2,
@@ -374,4 +376,4 @@ describe Agents::WebsiteAgent do
374 376
       end
375 377
     end
376 378
   end
377
-end
379
+end